#Biden Models Only Non-Imputed
#MAIN TEXT - Aug 29

if (!require(pacman)) install.packages("pacman")

pacman::p_load(tidyverse, # tidyverse 
               plm, # panel data analysis
               pglm, # generalized panel data analysis
               clubSandwich, # robust SEs
               here, # computational reproducibility 
               glue, # gluing objects and strings 
               tidylog, # logging analysis
               naniar, # missing data 
               zeallot, # multiple assignments 
               readxl, 
               ggpubr,
               hrbrthemes, 
               wesanderson,
               broom, 
               sjPlot, 
               jtools, 
               patchwork, 
               broom.mixed, 
               estimatr, 
               stargazer,
               DeclareDesign,
               sensemakr,
               mice,
               usmap,
               janitor,
               modelsummary, 
               flextable,
               officer,
               extrafont,
               gt,
               bootstrap,
               lme4,
               clipr,
               infer)

source(here("functions/utils.r"))
source(here("functions/theme.R"))

ggplot2::theme_set(theme_bw())

# no scientific notation
options(scipen = 999)

#load files
df <- read.csv("/Users/vivleung/Downloads/dataverse_files (1)/panel_data_jul24_2.csv")

#check measurements
df2 <- subset(df, wave == 2)

t.test(subset(df, wave == 2)$apa.discrim.rona, subset(df, wave == 3)$apa.discrim.rona) 

ch_score <- data.frame(ch_score = subset(df, wave == 2)$apa.discrim.rona - subset(df, wave == 3)$apa.discrim.rona)

ch_score %>%
  ggplot(aes(x = ch_score)) +
  geom_bar() +
  scale_x_binned() +
  labs(x = "Change score between waves 2 and 3",
       y = "Count")

dv <- subset(df, wave == 1)$gendiscrim

covat2 <- df %>%
  filter(wave == 2) %>%
  select(apa.discrim.rona, usborn, edu, DEM, GOP, age, male, chinese, indian)

covat3 <- df %>%
  filter(wave == 3) %>%
  select(apa.discrim.rona, usborn, edu, DEM, GOP, age, male, chinese, indian)

covat2$gendiscrim <- dv
covat3$gendiscrim <- dv

# factorize dummy variables
covat2 <- factorize_dummy(covat2)
covat3 <- factorize_dummy(covat3)

mods_exo <- list(
  
  "2nd wave" = lm(apa.discrim.rona ~ gendiscrim + usborn + edu + DEM + GOP + age + male + chinese + indian, data = covat2),
  
  "3rd wave" = lm(apa.discrim.rona ~ gendiscrim + usborn + edu + DEM + GOP + age + male + chinese + indian, data = covat3)
  
)

var.list <- names(df)[str_detect(names(df), "disc")][c(1, 3)]

var.list <- c(var.list, c("lostjob", "incomereduc", "knowrona", "linkedfate", "idimport", "asnpride", "rona.behav.sneeze", "rona.behav.language", "rona.behav.walk", "rona.behav.transit", "rona.behav.other", "rona.behav.nochange", "X2020likelyvote", "biden", "DEM", "DEM.strong", "GOP", "GOP.strong", "ronaunfair", "ronaunfairasian", "rona.apa.mistreat", "apa.responsible", "apa.have.rona", "apa.harassment", "whiteaffect", "blackaffect", "latinoaffect", "asianaffect"))

group_sum <- purrr::map_dfr(seq(var.list), group_mean) %>%
  mutate(variable = rep(var.list, each = 3)) %>%
  filter(!is.na(mean)) 

#mark non-registered as NA
table(df$votereg, df$wave)
df$nonvoter <- NA 
df$nonvoter <- ifelse(df$votereg==1,1, NA)
table(df$nonvoter, df$wave)

## remove non-voters marked as NA, keep wave 2 because question wasn't asked
to_remove <- NA 
to_remove <- df %>%
  filter(is.na(df$nonvoter) & wave != 2) %>%
  select(pid)


## mark people who didnt vote in election as NA
df$didntvote <- NA
df$didntvote <- df$X2020likelyvote
df$didntvote[df$X2020likelyvote==0 & df$wave==3]<- NA
table(df$didntvote, df$wave)
summary(df$didntvote)

to_remove_2 <-NA
to_remove_2 <- df %>%
  filter(is.na(df$didntvote) & df$wave!=1) %>%
           select(pid)
###
table(df$votereg, df$wave)
table(df$nonvoter, df$wave)

#df <- df %>% filter(!is.na(nonvoter) & wave!=2)
#df <- df %>% filter(!is.na(didntvote) & wave!=1)


##
df %>%
  filter(wave != 1) %>%
  select(gendiscrim, apa.discrim.rona, usborn, edu, DEM, GOP, age, male, natorigin, biden, `X2020likelyvote`) %>%
  vis_miss()

df$likely_vote <- df$X2020likelyvote

# create a proxy variable
df$proxy <- rep(subset(df, wave == 1)$gendiscrim, 3)

## check the proxy data distribution
df$proxy %>% table()

df$prior[df$proxy == 0.5] <- "Middle"
df$prior[df$proxy < 0.5] <- "Low"
df$prior[df$proxy > 0.5] <- "High"

min(df$proxy) ; max(df$proxy)

df$usborn <- factor(df$usborn)
df$male <- factor(df$male)
df$chinese <- factor(df$chinese)
df$indian <- factor(df$indian)
df$GOP <- factor(df$GOP)
df$DEM <- factor(df$DEM)
df$wave <- factor(df$wave)
df$nat_origin <- factor(df$natorigin)

model.outs <- cal_model_outputs(df)
model.outs.low <- cal_model_outputs(df %>% filter(prior == "Low"))
model.outs.middle <- cal_model_outputs(df %>% filter(prior == "Middle"))
model.outs.high <- cal_model_outputs(df %>% filter(prior == "High"))

cm <- c("apa.discrim.rona" = "COVID Discrim",
        "gendiscrim" = "General Discrim (Post COVID)",
        "usborn1" = "US Born",
        "edu" = "Education",
        "DEM1" = "Democratic Party",
        "GOP1" = "Republican Party",
        "age" = "Age",
        "male1" = "Male",
        "wave3" = "Wave3",
        "proxy" = "Wave 1 General Discrim",
        "apa.discrim.rona:proxy" = "COVID Discrim* Pre-COVID General Discrim",
        "chinese1" = "Chinese",
        "indian1" = "Indian")


##
save <- df

df <- anti_join(df, to_remove) 
df <- anti_join(df, to_remove_2)

## Table 4
biden_models <- list(
  
  "Logit" = glm(biden ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df, family = binomial),
  
  "Weighted" = glm(biden ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df, family = binomial, weight = weights),
  
  "Fixed" = plm(biden ~ gendiscrim + apa.discrim.rona, data = df, index = c("pid", "wave"), model = "within"),    
  
  "Interaction" = glm(biden ~  apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + proxy + apa.discrim.rona:proxy + nat_origin, data = df, family = binomial)
  
)

suppressWarnings({
  
modelsummary(biden_models,
             fmt = 3,
             coef_map = cm,
             coef_omit = "Intercept|nat_origin",
             statistic = c("p = {p.value}"),
             output = here("outputs", "table4.docx"),
             vcov = "robust")

})

table(df$wave) #number of participants 
